#this is a function that takes in two arrays (cleaned docs, categories) and a param# for which model to run, and returns the auc, loss, accuracy, fpr, tpr, threshold, predictions, and all docs (needed for later gradient calcs).
def CNN_call(clean_docs,categories,model_num,fold_num):
    #old version!!!!
    #from sklearn.model_selection import train_test_split
    #train_doc, test_doc, train_val, test_val = train_test_split(clean_docs,categories,test_size=.1)
    
    #new version!!!
    from sklearn.model_selection import StratifiedKFold
    folder=StratifiedKFold(n_splits=10)
    import numpy as np
    train_sets=[]
    test_sets=[]
    for train, test in folder.split(clean_docs,categories):
        train_sets.append(train)
        test_sets.append(test)
    test_val=categories[test_sets[fold_num]]
    train_val=categories[train_sets[fold_num]]
    
    test_doc=clean_docs[test_sets[fold_num]]
    train_doc=clean_docs[train_sets[fold_num]]
    
    if model_num==0: # BOW tfidf with 1 hidden layer and .1 validation
        #tokenizing
        from keras.preprocessing.text import Tokenizer
        tok=Tokenizer()
        tok.fit_on_texts(train_doc)
        
        #encode with tfidf
        train_encoded_docs=tok.texts_to_matrix(train_doc,mode='tfidf')
        test_encoded_docs=tok.texts_to_matrix(test_doc,mode='tfidf')
       
       #input dimension
        num_words=train_encoded_docs.shape[1]
        
        #keras layers needed for import
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.callbacks import ModelCheckpoint

        #make the model
        model= Sequential()
        model.add(Dense(50, input_shape=(num_words,),activation='relu'))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
        
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
        #fit the model
        history=model.fit(train_encoded_docs,train_val,epochs=25, verbose=2, validation_split = .1,callbacks=callbacks_list)
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']
        #stats
        loss, acc = model.evaluate(test_encoded_docs, test_val, verbose=2)
        
        #results of prediction
        test_preds=model.predict(test_encoded_docs).reshape(1,len(test_encoded_docs))[0]
        
        #ROC metrics
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        roc_auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, roc_auc, test_preds, train_doc, test_doc, train_val, test_val, train_encoded_docs, test_encoded_docs,val_loss,train_loss
    if model_num==1: # BOW tfidf with 1 hidden layer + 1 dropout layer + 1 hidden layer and .1 validation
        #perform a train/test split at 90/10
        
        #tokenizing
        from keras.preprocessing.text import Tokenizer
        tok=Tokenizer()
        tok.fit_on_texts(train_doc)
        
        #encode with tfidf
        train_encoded_docs=tok.texts_to_matrix(train_doc,mode='tfidf')
        test_encoded_docs=tok.texts_to_matrix(test_doc,mode='tfidf')
       
       #input dimension
        num_words=train_encoded_docs.shape[1]
        
        #keras layers needed for import
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        #make the model
        model= Sequential()
        model.add(Dense(50, input_shape=(num_words,),activation='relu'))
        model.add(Dropout(0.1))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
        #fit the model
        history=model.fit(train_encoded_docs,train_val,epochs=25, verbose=2, validation_split = .1,callbacks=callbacks_list)
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']
        #stats
        loss, acc = model.evaluate(test_encoded_docs, test_val, verbose=2)
        
        #results of prediction
        test_preds=model.predict(test_encoded_docs).reshape(1,len(test_encoded_docs))[0]
        
        #ROC metrics
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        roc_auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, roc_auc, test_preds, train_doc, test_doc, train_val, test_val, train_encoded_docs, test_encoded_docs,val_loss,train_loss
    if model_num==2: # BOW tfidf with 1 hidden layer + 1 dropout layer + 10 hidden layer and .1 validation
        #perform a train/test split at 90/10
        
        #tokenizing
        from keras.preprocessing.text import Tokenizer
        
        tok=Tokenizer()
        tok.fit_on_texts(train_doc)
        
        #encode with tfidf
        train_encoded_docs=tok.texts_to_matrix(train_doc,mode='tfidf')
        test_encoded_docs=tok.texts_to_matrix(test_doc,mode='tfidf')
       
       #input dimension
        num_words=train_encoded_docs.shape[1]
        
        #keras layers needed for import
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        #make the model
        model= Sequential()
        model.add(Dense(50, input_shape=(num_words,),activation='relu'))
        model.add(Dropout(0.1))
        for i in range(0,10):
            model.add(Dense(100,activation='relu'))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
        #fit the model
        history=model.fit(train_encoded_docs,train_val,epochs=25, verbose=2, validation_split = .1,callbacks=callbacks_list)
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']
        #stats
        loss, acc = model.evaluate(test_encoded_docs, test_val, verbose=2)
        
        #results of prediction
        test_preds=model.predict(test_encoded_docs).reshape(1,len(test_encoded_docs))[0]
        
        #ROC metrics
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        roc_auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, roc_auc, test_preds, train_doc, test_doc, train_val, test_val, train_encoded_docs, test_encoded_docs,val_loss,train_loss
    if model_num==3: # BOW tfidf with 1 hidden layer + 1 dropout layer + 100 hidden layer and .1 validation
        #perform a train/test split at 90/10
        
        #tokenizing
        from keras.preprocessing.text import Tokenizer
        tok=Tokenizer()
        tok.fit_on_texts(train_doc)
        
        #encode with tfidf
        train_encoded_docs=tok.texts_to_matrix(train_doc,mode='tfidf')
        test_encoded_docs=tok.texts_to_matrix(test_doc,mode='tfidf')
       
       #input dimension
        num_words=train_encoded_docs.shape[1]
        
        #keras layers needed for import
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        #make the model
        model= Sequential()
        model.add(Dense(50, input_shape=(num_words,),activation='relu'))
        model.add(Dropout(0.1))
        for i in range(0,100):
            model.add(Dense(100,activation='relu'))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
        #fit the model
        history=model.fit(train_encoded_docs,train_val,epochs=25, verbose=2, validation_split = .1,callbacks=callbacks_list)
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']
        #stats
        loss, acc = model.evaluate(test_encoded_docs, test_val, verbose=2)
        
        #results of prediction
        test_preds=model.predict(test_encoded_docs).reshape(1,len(test_encoded_docs))[0]
        
        #ROC metrics
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        roc_auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, roc_auc, test_preds, train_doc, test_doc, train_val, test_val, train_encoded_docs, test_encoded_docs,val_loss,train_loss
    if model_num==4: # BOW tfidf with 1 hidden layer 
        #tokenizing
        from keras.preprocessing.text import Tokenizer
        tok=Tokenizer()
        tok.fit_on_texts(train_doc)
        
        #encode with tfidf
        train_encoded_docs=tok.texts_to_matrix(train_doc,mode='tfidf')
        test_encoded_docs=tok.texts_to_matrix(test_doc,mode='tfidf')
       
       #input dimension
        num_words=train_encoded_docs.shape[1]
        
        #keras layers needed for import
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.callbacks import ModelCheckpoint

        #make the model
        model= Sequential()
        model.add(Dense(50, input_shape=(num_words,),activation='relu'))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
        
        #fit the model
        history=model.fit(train_encoded_docs,train_val,epochs=25, verbose=2)
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']
        #stats
        loss, acc = model.evaluate(test_encoded_docs, test_val, verbose=2)
        
        #results of prediction
        test_preds=model.predict(test_encoded_docs).reshape(1,len(test_encoded_docs))[0]
        
        #ROC metrics
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        roc_auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, roc_auc, test_preds, train_doc, test_doc, train_val, test_val, train_encoded_docs, test_encoded_docs,val_loss,train_loss
    if model_num==5: # BOW tfidf with 1 hidden layer + 1 dropout layer + 100 hidden layer and .1 validation
        #perform a train/test split at 90/10
        
        #tokenizing
        from keras.preprocessing.text import Tokenizer
        tok=Tokenizer()
        tok.fit_on_texts(train_doc)
        
        #encode with tfidf
        train_encoded_docs=tok.texts_to_matrix(train_doc,mode='tfidf')
        test_encoded_docs=tok.texts_to_matrix(test_doc,mode='tfidf')
       
       #input dimension
        num_words=train_encoded_docs.shape[1]
        
        #keras layers needed for import
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        #make the model
        model= Sequential()
        model.add(Dense(50, input_shape=(num_words,),activation='relu'))
        model.add(Dropout(0.25))
        for i in range(0,10):
            model.add(Dense(100,activation='relu'))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
        #fit the model
        history=model.fit(train_encoded_docs,train_val,epochs=25, verbose=2, validation_split = .1,callbacks=callbacks_list)
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']
        #stats
        loss, acc = model.evaluate(test_encoded_docs, test_val, verbose=2)
        
        #results of prediction
        test_preds=model.predict(test_encoded_docs).reshape(1,len(test_encoded_docs))[0]
        
        #ROC metrics
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        roc_auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, roc_auc, test_preds, train_doc, test_doc, train_val, test_val, train_encoded_docs, test_encoded_docs,val_loss,train_loss
    if model_num==6: # BOW tfidf with 1 hidden layer + 1 dropout layer + 100 hidden layer and .1 validation
        #perform a train/test split at 90/10
        
        #tokenizing
        from keras.preprocessing.text import Tokenizer
        tok=Tokenizer()
        tok.fit_on_texts(train_doc)
        
        #encode with tfidf
        train_encoded_docs=tok.texts_to_matrix(train_doc,mode='tfidf')
        test_encoded_docs=tok.texts_to_matrix(test_doc,mode='tfidf')
       
       #input dimension
        num_words=train_encoded_docs.shape[1]
        
        #keras layers needed for import
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        #make the model
        model= Sequential()
        model.add(Dense(50, input_shape=(num_words,),activation='relu'))
        model.add(Dropout(0.1))
        for i in range(0,10):
            model.add(Dense(100,activation='relu'))
        model.add(Dropout(0.1))
        for i in range(0,10):
            model.add(Dense(100,activation='relu'))
            
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
        
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
        #fit the model
        history=model.fit(train_encoded_docs,train_val,epochs=25, verbose=2, validation_split = .1,callbacks=callbacks_list)
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']
        #stats
        loss, acc = model.evaluate(test_encoded_docs, test_val, verbose=2)
        
        #results of prediction
        test_preds=model.predict(test_encoded_docs).reshape(1,len(test_encoded_docs))[0]
        
        #ROC metrics
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        roc_auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, roc_auc, test_preds, train_doc, test_doc, train_val, test_val, train_encoded_docs, test_encoded_docs,val_loss,train_loss
    if model_num==7: # BOW tfidf with 1 hidden layer + 1 dropout layer + 100 hidden layer and .1 validation
        #perform a train/test split at 90/10
        
        #tokenizing
        from keras.preprocessing.text import Tokenizer
        tok=Tokenizer()
        tok.fit_on_texts(train_doc)
        
        #encode with tfidf
        train_encoded_docs=tok.texts_to_matrix(train_doc,mode='tfidf')
        test_encoded_docs=tok.texts_to_matrix(test_doc,mode='tfidf')
       
       #input dimension
        num_words=train_encoded_docs.shape[1]
        
        #keras layers needed for import
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        #make the model
        model= Sequential()
        model.add(Dense(50, input_shape=(num_words,),activation='relu'))
        model.add(Dropout(0.1))
        for i in range(0,20):
            model.add(Dense(100,activation='relu'))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
        
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
        #fit the model
        history=model.fit(train_encoded_docs,train_val,epochs=25, verbose=2, validation_split = .1,callbacks=callbacks_list)
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']
        #stats
        loss, acc = model.evaluate(test_encoded_docs, test_val, verbose=2)
        
        #results of prediction
        test_preds=model.predict(test_encoded_docs).reshape(1,len(test_encoded_docs))[0]
        
        #ROC metrics
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        roc_auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, roc_auc, test_preds, train_doc, test_doc, train_val, test_val, train_encoded_docs, test_encoded_docs,val_loss,train_loss
    if model_num==8: # BOW tfidf with 1 hidden layer + 1 dropout layer + 100 hidden layer and .1 validation
        #perform a train/test split at 90/10
        
        #tokenizing
        from keras.preprocessing.text import Tokenizer
        tok=Tokenizer()
        tok.fit_on_texts(train_doc)
        
        #encode with tfidf
        train_encoded_docs=tok.texts_to_matrix(train_doc,mode='tfidf')
        test_encoded_docs=tok.texts_to_matrix(test_doc,mode='tfidf')
       
       #input dimension
        num_words=train_encoded_docs.shape[1]
        
        #keras layers needed for import
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        #make the model
        model= Sequential()
        model.add(Dense(50, input_shape=(num_words,),activation='relu'))
        model.add(Dropout(0.25))
        for i in range(0,10):
            model.add(Dense(100,activation='relu'))
        model.add(Dropout(0.25))
        for i in range(0,10):
            model.add(Dense(100,activation='relu'))    
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
        
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
        #fit the model
        history=model.fit(train_encoded_docs,train_val,epochs=25, verbose=2, validation_split = .1,callbacks=callbacks_list)
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']
        #stats
        loss, acc = model.evaluate(test_encoded_docs, test_val, verbose=2)
        
        #results of prediction
        test_preds=model.predict(test_encoded_docs).reshape(1,len(test_encoded_docs))[0]
        
        #ROC metrics
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        roc_auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, roc_auc, test_preds, train_doc, test_doc, train_val, test_val, train_encoded_docs, test_encoded_docs,val_loss,train_loss
  #add more if statements for models as you have them :)
        